### use Risk and self-respect dataverse download folder as your working directory. Where needed, we refer to this as "yourWorkingDirectory"

# set working directory
# setwd("yourWorkingDirectory")

### Note: due to the size of the dataset and the person-by-person calculations that require iteration through time, some of the routines in this file take some time to complete. 

## load libraries (install if necessary)
library(foreign)
#library(reshape2)
library(splitstackshape)
library(sandwich)
library(AER)
library(MASS)
library(survival)

### download BHPS waves 1-18 data, in stata format, from the UK Data Service (see https://discover.ukdataservice.ac.uk/catalogue/?sn=5151 ) 

### place the resulting folder "UKDA-5151-stata8_BHPS1991to2008" in "yourWorkingDirectory"

### read in data from stata format -- data are "wide" rather than "long" format
a.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/aindresp.dta", convert.factors = F)
b.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/bindresp.dta", convert.factors = F)
c.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/cindresp.dta", convert.factors = F)
d.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/dindresp.dta", convert.factors = F)
e.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/eindresp.dta", convert.factors = F)
f.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/findresp.dta", convert.factors = F)

g.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/gindresp.dta", convert.factors = F)
h.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/hindresp.dta", convert.factors = F)
i.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/iindresp.dta", convert.factors = F)
j.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/jindresp.dta", convert.factors = F)
k.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/kindresp.dta", convert.factors = F)
l.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/lindresp.dta", convert.factors = F)

m.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/mindresp.dta", convert.factors = F)
n.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/nindresp.dta", convert.factors = F)
o.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/oindresp.dta", convert.factors = F)
p.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/pindresp.dta", convert.factors = F)
q.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/qindresp.dta", convert.factors = F)
r.ind = read.dta("UKDA-5151-stata8_BHPS1991to2008/stata8/rindresp.dta", convert.factors = F)

### make lists of variable names from each wave to select variables for analysis dataset(s)

keep.w = c("wpaygyr", "wfihhmn", "wfiyr", "wqfhas", "wqfachi", "wdoby", "wsex", "wjbisco", "wjbstat", "wmastat", "wghqj", "wghqk", "wghqc", "wghqd", "pid")

keep.a = c("afihhmn", "afiyr", "aqfhas", "aqfed", "aqfachi", "adoby", "asex", "ajbisco", "ajbstat", "amastat", "aghqj", "aghqk", "aghqc", "aghqd", "pid")
keep.b = c("bfihhmn", "bfiyr", "bqfhas", "bqfed", "bqfachi", "bdoby", "bsex", "bjbisco", "bjbstat", "bmastat", "bghqj", "bghqk", "bghqc", "bghqd", "pid")
keep.c = c("cfihhmn", "cfiyr", "cqfhas", "cqfed", "cqfachi", "cdoby", "csex", "cjbisco", "cjbstat", "cmastat", "cghqj", "cghqk", "cghqc", "cghqd", "pid")
keep.d = c("dfihhmn", "dfiyr", "dqfhas", "dqfed", "dqfachi", "ddoby", "dsex", "djbisco", "djbstat", "dmastat", "dghqj", "dghqk", "dghqc", "dghqd", "pid")
keep.e = c("efihhmn", "efiyr", "eqfhas", "eqfed", "eqfachi", "edoby", "esex", "ejbisco", "ejbstat", "emastat", "eghqj", "eghqk", "eghqc", "eghqd", "pid")
keep.f = c("ffihhmn", "ffiyr", "fqfhas", "fqfed", "fqfachi", "fdoby", "fsex", "fjbisco", "fjbstat", "fmastat", "fghqj", "fghqk", "fghqc", "fghqd", "pid")

a.ind = a.ind[ , keep.a]
b.ind = b.ind[ , keep.b]
c.ind = c.ind[ , keep.c]
d.ind = d.ind[ , keep.d]
e.ind = e.ind[ , keep.e]
f.ind = f.ind[ , keep.f]

wide.ab = merge(a.ind, b.ind, by = "pid", all.x = T, all.y = T)
wide.ac = merge(wide.ab, c.ind, by = "pid", all.x =T, all.y = T)
rm(wide.ab)
wide.ad = merge(wide.ac, d.ind, by = "pid", all.x =T, all.y = T)
wide.ae = merge(wide.ad, e.ind, by = "pid", all.x =T, all.y = T)
wide.af = merge(wide.ae, f.ind, by = "pid", all.x =T, all.y = T)
rm(wide.ac, wide.ad, wide.ae)

## g to l
keep.g = c("gfihhmn", "gfiyr", "gqfhas", "gqfed", "gqfachi","gdoby", "gsex", "gjbisco", "gjbstat", "gmastat", "gghqj", "gghqk", "gghqc", "gghqd", "pid")

keep.h = c("hfihhmn", "hfiyr", "hqfhas", "hqfed", "hqfachi", "hdoby", "hsex", "hjbisco", "hjbstat", "hmastat", "hghqj", "hghqk", "hghqc", "hghqd", "pid")

keep.i = c("ifihhmn", "ifiyr", "iqfhas", "iqfed","iqfachi", "idoby", "isex", "ijbisco", "ijbstat", "imastat", "ighqj", "ighqk", "ighqc", "ighqd", "pid")

keep.j = c("jfihhmn", "jfiyr", "jqfhas", "jqfed", "jqfachi", "jdoby", "jsex", "jjbisco", "jjbstat", "jmastat", "jghqj", "jghqk", "jghqc", "jghqd", "pid")

keep.k = c("kfihhmn", "kfiyr", "kqfhas", "kqfed", "kqfachi", "kdoby", "ksex", "kjbisco", "kjbstat", "kmastat", "kghqj", "kghqk", "kghqc", "kghqd", "pid")

keep.l = c("lfihhmn", "lfiyr", "lqfhas", "lqfed", "lqfachi", "ldoby", "lsex", "ljbisco", "ljbstat", "lmastat", "lghqj", "lghqk", "lghqc", "lghqd", "pid")

g.ind = g.ind[ , keep.g]
h.ind = h.ind[ , keep.h]
i.ind = i.ind[ , keep.i]
j.ind = j.ind[ , keep.j]
k.ind = k.ind[ , keep.k]
l.ind = l.ind[ , keep.l]
wide.ag = merge(wide.af, g.ind, by = "pid", all.x =T, all.y = T)
wide.ah = merge(wide.ag, h.ind, by = "pid", all.x =T, all.y = T)
wide.ai = merge(wide.ah, i.ind, by = "pid", all.x =T, all.y = T)
wide.aj = merge(wide.ai, j.ind, by = "pid", all.x =T, all.y = T)
wide.ak = merge(wide.aj, k.ind, by = "pid", all.x =T, all.y = T)
wide.al = merge(wide.ak, l.ind, by = "pid", all.x =T, all.y = T)
rm(wide.af, wide.ag, wide.ah, wide.ai, wide.aj, wide.ak)

## m to r
keep.m = c("mfihhmn", "mfiyr", "mqfhas", "mqfed", "mqfachi", "mdoby", "msex", "mjbisco", "mjbstat", "mmastat", "mghqj", "mghqk", "mghqc", "mghqd", "pid")
keep.n = c("nfihhmn", "nfiyr", "nqfhas", "nqfed", "nqfachi", "ndoby", "nsex", "njbisco", "njbstat", "nmastat", "nghqj", "nghqk", "nghqc", "nghqd", "pid")
keep.o = c("ofihhmn", "ofiyr", "oqfhas", "oqfed", "oqfachi", "odoby", "osex", "ojbisco", "ojbstat", "omastat", "oghqj", "oghqk", "oghqc", "oghqd", "pid")
keep.p = c("pfihhmn", "pfiyr", "pqfhas", "pqfed", "pqfachi", "pdoby", "psex", "pjbisco", "pjbstat", "pmastat", "pghqj", "pghqk", "pghqc", "pghqd", "pid")
keep.q = c("qfihhmn", "qfiyr", "qqfhas", "qqfed", "qqfachi", "qdoby", "qsex", "qjbisco", "qjbstat", "qmastat", "qghqj", "qghqk", "qghqc", "qghqd", "pid")
keep.r = c("rfihhmn", "rfiyr", "rqfhas", "rqfed", "rqfachi", "rdoby", "rsex", "rjbisco", "rjbstat", "rmastat", "rghqj", "rghqk", "rghqc", "rghqd", "pid")

m.ind = m.ind[ , keep.m]
n.ind = n.ind[ , keep.n]
o.ind = o.ind[ , keep.o]
p.ind = p.ind[ , keep.p]
q.ind = q.ind[ , keep.q]
r.ind = r.ind[ , keep.r]
wide.am = merge(wide.al, m.ind, by = "pid", all.x =T, all.y = T)
wide.an = merge(wide.am, n.ind, by = "pid", all.x =T, all.y = T)
wide.ao = merge(wide.an, o.ind, by = "pid", all.x =T, all.y = T)
wide.ap = merge(wide.ao, p.ind, by = "pid", all.x =T, all.y = T)
wide.aq = merge(wide.ap, q.ind, by = "pid", all.x =T, all.y = T)
wide.ar = merge(wide.aq, r.ind, by = "pid", all.x =T, all.y = T)
rm(wide.al, wide.am, wide.an, wide.ao, wide.ap, wide.aq)

## fix names for reshape:
# save(wide.ar, file = "bhps_use_ar.RData")
#load("bhps_use_ar.RData")
newnames = paste(substr(names(wide.ar)[2:253], 2, nchar(names(wide.ar)[2:253])), substr(names(wide.ar)[2:253], 1, 1), sep = "_")
names(wide.ar)[2:253] = newnames

long.ar = merged.stack(wide.ar, id.vars="pid", var.stubs=c("fihhmn", "fiyr", "qfhas", "qfed",   "qfachi", "doby", "sex", "jbisco", "jbstat", "mastat", "ghqj", "ghqk", "ghqc", "ghqd"), sep="_")
names(long.ar)[2] = "wave"

###### some cleaning:
## drop those where gender, age are missing (these are all-NA rows)
long.ar = long.ar[ - which(is.na(long.ar$sex)==T),]

## trim some white space in isco codes
long.ar$jbisco = trimws(long.ar$jbisco, "both") 

## convert some isco codes from 2 or 3 to 4 digits:
long.ar$jbisco[which(long.ar$jbisco == "2---")] = "2000"
long.ar$jbisco[which(long.ar$jbisco == "74--")] = "7400"
long.ar$jbisco[which(long.ar$jbisco == "812-")] = "8120"
long.ar$jbisco[which(long.ar$jbisco == "131-")] = "1310"
long.ar$jbisco[which(long.ar$jbisco == "122-")] = "1220"
long.ar$jbisco[which(long.ar$jbisco == "311-")] = "3110"
long.ar$jbisco[which(long.ar$jbisco == "827-")] = "8270"
long.ar$jbisco[which(long.ar$jbisco == "214-")] = "2140"
long.ar$jbisco[which(long.ar$jbisco == "815-")] = "8150"
long.ar$jbisco[which(long.ar$jbisco == "822-")] = "8220"
long.ar$jbisco[which(long.ar$jbisco == "713-")] = "7130"
long.ar$jbisco[which(long.ar$jbisco == "----")] = NA
long.ar$jbisco = as.numeric(long.ar$jbisco)

## recode as NA missing and na values
long.ar$fihhmn[which(long.ar$fihhmn < 0)] = NA
long.ar$fiyr[which(long.ar$fiyr < 0)] = NA
long.ar$qfhas[which(long.ar$qfhas < 0)] = NA
long.ar$qfed[which(long.ar$qfed < 0)] = NA
long.ar$doby[which(long.ar$doby < 0)] = NA
long.ar$sex[which(long.ar$sex < 0)] = NA
long.ar$jbisco[which(long.ar$jbisco < 0)] = NA
long.ar$jbstat[which(long.ar$jbstat < 0)] = NA
long.ar$mastat[which(long.ar$mastat < 0)] = NA
long.ar$ghqj[which(long.ar$ghqj < 0)] = NA
long.ar$ghqk[which(long.ar$ghqk < 0)] = NA
long.ar$ghqc[which(long.ar$ghqc < 0)] = NA
long.ar$ghqd[which(long.ar$ghqd < 0)] = NA

### Save out the basic merged analysis dataset
save(long.ar, file = "bhps_use_ar_long.RData")

### creating variables for analysis

## numeric wave variable
 x = seq(1:18)
 alphaw = letters[1:18]
 alphset = as.data.frame(alphaw)
 alphset$x = x
 
 long.ar$wave.numeric = rep(NA, nrow(long.ar))
 for(i in 1:18){
 long.ar$wave.numeric[which(long.ar$wave == alphset$alphaw[i])] = x[i]
 }

## playing a useful part recently + (negatively scored) thinking of self as worthless person
long.ar$worth = ((5 - long.ar$ghqc) + (5 - long.ar$ghqk)) -2 # subtract 2 to get 0-6 scale instead of 2-8
# confidence in ability to hold self to plans
## capable of making decisions + (negatively scored) losing confidence
long.ar$planconf = ((5 - long.ar$ghqd) + (5 - long.ar$ghqj)) -2

long.ar$rsr1 = long.ar$worth + long.ar$planconf

long.ar$rsrb1s = as.numeric(long.ar$ghqc == 1 & long.ar$ghqd == 1 & long.ar$ghqj == 1 & long.ar$ghqk == 1 ) # only 4% of people with RSR

long.ar$rsrb2ss = as.numeric((long.ar$ghqc == 1 | long.ar$ghqc == 2) & (long.ar$ghqd == 1 | long.ar$ghqd == 2) & long.ar$ghqj == 1 & long.ar$ghqk == 1 ) # 42% of people with RSR

long.ar$rsrb4ss = as.numeric(long.ar$rsr1 > 9) # 48% of people with RSR

long.ar$rsrb3g = as.numeric((long.ar$ghqc == 1 | long.ar$ghqc == 2) & (long.ar$ghqd == 1 | long.ar$ghqd == 2) & (long.ar$ghqj == 1 | long.ar$ghqj == 2) & (long.ar$ghqk == 1 | long.ar$ghqk == 2)) # 76% of people with RSR

## marital status, recoded
long.ar$marital = as.numeric(long.ar$mastat == 1 | long.ar$mastat == 2)#married or living as couple
long.ar$marital[which(long.ar$mastat == 3)] = 3 # widowed
long.ar$marital[which(long.ar$mastat == 4 | long.ar$mastat == 5)] = 2 # sep/div
long.ar$marital[which(long.ar$mastat == 6)] = 0 # never married

# as factor
long.ar$marital.f = as.factor(long.ar$marital)

## log of annual income
long.ar$log.income = log(long.ar$fiyr + 1)

## rescaled linear income (in the tens of thousands)
long.ar$income10ks = long.ar$fiyr/10000

## time variables: year and age
long.ar$year = long.ar$wave.numeric + 1990
long.ar$age = long.ar$year - long.ar$doby

### two- and one-digit isco codes in long.ar:
long.ar$jbisco = as.character(long.ar$jbisco)
long.ar$isco1 = substr(long.ar$jbisco, start = 1, stop = 1)
long.ar$isco2 = substr(long.ar$jbisco, start = 1, stop = 2)

### load occupational unemployment rate (by gender) data.
### these data originally from the Office for National Statistics, downloaded on May 8 2017
### Originals at: http://webarchive.nationalarchives.gov.uk/20160105160709/http://www.ons.gov.uk/ons/rel/lms/labour-market-statistics/december-2015/table-unem02.xls

## selected variables/years and R-readable format:
our = read.csv("occupationalUnempRiskForBHPS.csv")

## merge by year-gender-occupation:
pull.our = function(rowInLongAR){
	s = NA
	ourate = NA
	if(long.ar$sex[rowInLongAR] == 1){  # men
		s= "m"}
	else if (long.ar$sex[rowInLongAR] == 2){ # women
		s = "w"}
	#if(is.na(long.ar$isco1[rowInLongAR])== F){
	isco = long.ar$isco1[rowInLongAR]
	if(is.na(isco) == F){
	colind = paste(s, isco, sep = ".")
	ourate = our[which(our$year == long.ar$year[rowInLongAR]), colind]
	}
	else{ourate = NA}
	ourate
	#c(isco, s, ourate)
	}

### attach gender-occupation unemployment rates to respondents:

## only run on observations with gender, occupation, year details
genoccobs = c(which(long.ar$year > 2000 & is.na(long.ar$sex)==F & is.na(long.ar$isco1)==F))
#restart = genoccobs[which(genoccobs > 51844)]
long.ar$gour = rep(NA, nrow(long.ar))
#for(i in 1:nrow(long.ar)){
for(i in genoccobs){
	long.ar$gour[i] = pull.our(i)
}
## this may still take a little time (couple of hours) to run

## skill specificity data
sksp = read.csv("skspForBHPS.csv")
## original data from Iversen, Cusack and Rehm, available at http://www.people.fas.harvard.edu/~iversen/SkillSpecificity.htm

long.ar$isco2 = as.numeric(long.ar$isco2)
long.ar = merge(long.ar, sksp, by = "isco2", all.x = T)

long.ar$pidf = as.factor(long.ar$pid)

# calculate mean, continuous version
indivs = unique(long.ar$pid)
mean.rsr.i = rep(NA, length(indivs))

for(i in 1:length(indivs)){
	mean.rsr.i[i] = mean(long.ar$rsr1[which(long.ar$pid == indivs[i])], na.rm = T)
}

rsrfe = as.data.frame(cbind(indivs, mean.rsr.i))

long.ar = merge(long.ar, rsrfe, by.x = "pid", by.y = "indivs", all.x = T)
long.ar$rsr1.demeaned = long.ar$rsr1 - long.ar$mean.rsr.i

mean.rsrb2ss.i = rep(NA, length(indivs))

for(i in 1:length(indivs)){
	mean.rsrb2ss.i[i] = mean(long.ar$rsrb2ss[which(long.ar$pid == indivs[i])], na.rm = T)
}

rsrfe$mean.rsrb2ss.i = mean.rsrb2ss.i

long.ar = merge(long.ar, rsrfe[ , c("indivs", "mean.rsrb2ss.i")], by.x = "pid", by.y = "indivs", all.x = T)
long.ar$rsrb2ss.demeaned = long.ar$rsrb2ss - long.ar$mean.rsrb2ss.i

## generous SR version:
mean.rsrb3g.i = rep(NA, length(indivs))

for(i in 1:length(indivs)){
	mean.rsrb3g.i[i] = mean(long.ar$rsrb3g[which(long.ar$pid == indivs[i])], na.rm = T)
}

rsrfe$mean.rsrb3g.i = mean.rsrb3g.i

long.ar = merge(long.ar, rsrfe[ , c("indivs", "mean.rsrb3g.i")], by.x = "pid", by.y = "indivs", all.x = T)
long.ar$rsrb3g.demeaned = long.ar$rsrb3g - long.ar$mean.rsrb3g.i

long.ar$wave.f = as.factor(long.ar$wave.numeric)

## some time-varying things: marital status and income
long.ar$marital = as.numeric(long.ar$mastat == 1 | long.ar$mastat == 2)#married or living as couple
long.ar$marital[which(long.ar$mastat == 3)] = 3 # widowed
long.ar$marital[which(long.ar$mastat == 4 | long.ar$mastat == 5)] = 2 # sep/div
long.ar$marital[which(long.ar$mastat == 6)] = 0 # never married

## log of annual income
long.ar$log.income = log(long.ar$fiyr + 1)

## rescaled linear income (in the tens of thousands)
long.ar$income10ks = long.ar$fiyr/10000
long.ar$income10ks2 = long.ar$income10ks^2

long.ar$marital.f = as.factor(long.ar$marital)

long.ar$year = long.ar$wave.numeric + 1990
long.ar$age = long.ar$year - long.ar$doby
long.ar$age2 = long.ar$age^2

## education
long.ar$edqual = long.ar$qfachi
long.ar$edqual[which(long.ar$qfachi < 0)] = NA
long.ar$edqual.f = as.factor(long.ar$edqual)

## "bad outcomes" -- for overcoming adversity analyses
long.ar$risk.status1 = rep(NA, nrow(long.ar)) # narrow version of non-risk: employment and self employment only (so maternity, retirement, students all dropped as NA)
long.ar$risk.status1[which(long.ar$jbstat == 1 | long.ar$jbstat == 2)] = 0
long.ar$risk.status1[which(long.ar$jbstat == 3 |  long.ar$jbstat == 8 | long.ar$jbstat == 10)] = 1

long.ar$risk.status2 = rep(NA, nrow(long.ar)) # broad version of non-risk: employment, self employment, maternity, family care, retirement, students = 0)
long.ar$risk.status2[which(long.ar$jbstat == 1 | long.ar$jbstat == 2  | long.ar$jbstat == 4  | long.ar$jbstat == 5  | long.ar$jbstat == 6 | long.ar$jbstat == 7)] = 0
long.ar$risk.status2[which(long.ar$jbstat == 3 |  long.ar$jbstat == 8 | long.ar$jbstat == 10)] = 1

long.ar$self.employed = as.numeric(long.ar$jbstat == 1)

## create indicator of number (if any) of previous times spent at risk

unique.people = unique(long.ar$pid)

## first, previous-but-ended, ever, ongoing (length of spell) at-risk:
ever.ar = rep(NA, length(unique.people))
ever.ar2 = rep(NA, length(unique.people)) # propagates NAs, so if ever NA, don't get data on ever.ar

for(i in 1:length(unique.people)){
	ever.ar[i] = as.numeric(1 %in% long.ar$risk.status1[which(long.ar$pid == unique.people[i])])
	ever.ar2[i] = as.numeric(sum(long.ar$risk.status1[which(long.ar$pid == unique.people[i])])>0)
	}

first.ar = rep(NA, length(unique.people))
first.ar2 = rep(NA, length(unique.people))
## "missing" first ars can't be zero, need to be > 18
first.ar[which(ever.ar == 0)] = 999
first.ar2[which(ever.ar2 == 0)] = 999

for(i in which(ever.ar == 1)){
	first.ar[i]= min(long.ar$wave.n[which(long.ar$pid == unique.people[i] & long.ar$risk.status1 == 1)])
	}
for(i in which(ever.ar2 == 1)){
	first.ar2[i]= min(long.ar$wave.n[which(long.ar$pid == unique.people[i] & long.ar$risk.status1 == 1)])
}

first.ar[which(first.ar > 18)] = NA
first.ar2[which(first.ar2 > 18)] = NA

everatrisk = as.data.frame(cbind(unique.people, ever.ar, ever.ar2, first.ar, first.ar2))

#ridcols =c(grep("ever.ar", names(long.ar)), grep("first.ar", names(long.ar)),  grep("prev.ar", names(long.ar)))
#long.ar= long.ar[ , -ridcols]

long.ar = merge(long.ar, everatrisk, by.x = "pid", by.y = "unique.people", all.x = T)
	
long.ar$first.obs.ar = as.numeric(long.ar$wave.numeric == long.ar$first.ar)
	
prev.ar = rep(NA, nrow(long.ar))
prev.ar[which(long.ar$wave.numeric > long.ar$first.ar)] = 1
prev.ar[which(long.ar$wave.numeric <= long.ar$first.ar)] = 0
prev.ar[which(long.ar$ever.ar == 0)] = 0
long.ar$prev.ar   = prev.ar

prev.ar2 = rep(NA, nrow(long.ar))
prev.ar2[which(long.ar$ever.ar2 == 0)] = 0
prev.ar2[which(long.ar$wave.numeric <= long.ar$first.ar2)] = 0
prev.ar2[which(long.ar$wave.numeric > long.ar$first.ar2)] = 1
long.ar$prev.ar2   = prev.ar2

### find periods of bad outcomes, successfully resolved
thesecols = c(grep("risk.status1", names(long.ar)), grep("wave.n", names(long.ar)))

find.bounded = function(rowInLAX){
	current = long.ar$wave.n[rowInLAX]
	first.ar = long.ar$first.ar[rowInLAX]
		## relevant data
	rel.i.obs = long.ar[which(long.ar$pid == long.ar$pid[rowInLAX] & long.ar$wave.n <= current), ..thesecols]
 	rel.i.obs = rel.i.obs[order(rel.i.obs$wave.numeric),]

## is there a zero in a wave number bigger than first.ar
	since.first = rel.i.obs$risk.status[which(rel.i.obs$wave.numeric > first.ar)]
	out = as.numeric(0 %in% since.first)
	out	
	#list(rel.i.obs, first.ar, since.first, out)
	}

bounded.prev.ar = rep(NA, nrow(long.ar))
bounded.prev.ar[which(long.ar$ever.ar == 0)] = 0
bounded.prev.ar[which(long.ar$prev.ar == 0)] = 0

for(i in which(long.ar$prev.ar == 1)){
	bounded.prev.ar[i] = find.bounded(i)
}
long.ar$bounded.prev.ar = bounded.prev.ar

# new var (individual-specific): mean not-at-risk rsr
mean.nar.rsr.i = rep(NA, length(unique.people))

for(i in 1:length(indivs)){
	mean.nar.rsr.i[i] = mean(long.ar$rsr1[which(long.ar$pid == unique.people[i] & long.ar$risk.status1 == 0)], na.rm = T)
}

mean.nar.rsrb2ss.i = rep(NA, length(unique.people))

for(i in 1:length(indivs)){
	mean.nar.rsrb2ss.i[i] = mean(long.ar$rsrb2ss[which(long.ar$pid == unique.people[i]  & long.ar$risk.status1 == 0)], na.rm = T)
}

nar.rsrfe = as.data.frame(cbind(unique.people, mean.nar.rsr.i))
nar.rsrfe$mean.nar.rsrb2ss.i = mean.nar.rsrb2ss.i

## and individual-specific ar mean rsr:

mean.ar.rsr.i = rep(NA, length(unique.people))

for(i in 1:length(indivs)){
	mean.ar.rsr.i[i] = mean(long.ar$rsr1[which(long.ar$pid == unique.people[i] & long.ar$risk.status1 == 1)], na.rm = T)
}

mean.ar.rsrb2ss.i = rep(NA, length(unique.people))

for(i in 1:length(indivs)){
	mean.ar.rsrb2ss.i[i] = mean(long.ar$rsrb2ss[which(long.ar$pid == unique.people[i]  & long.ar$risk.status1 == 1)], na.rm = T)
}

nar.rsrfe$mean.ar.rsrb2ss.i = mean.ar.rsrb2ss.i

long.ar = merge(long.ar, nar.rsrfe, by.x = "pid", by.y = "unique.people", all.x = T)

long.ar$rsrb2ss.demeaned.nar = long.ar$rsrb2ss - long.ar$mean.nar.rsrb2ss.i.x
long.ar$rsrb2ss.demeaned.ar = long.ar$rsrb2ss - long.ar$mean.ar.rsrb2ss.i

